################################################################################
## Purpose: Replication Code for Figure 4 and Table A8
## Project: Missionary Activity, Education, and Long-run Political Development  
## Written by: Soeren J. Henn  
## Last updated: 2025-05-22
################################################################################

# This R-Markdown file generates Figures 4 and Tables A8 of the paper 
# "Missionary Activity, Education, and Long-run Political Development: Evidence 
# Across Regime Types in Africa" by Soeren J. Henn, Horacio Larreguy, and Carlos
# Schmidt-Padilla. Please see the ReadMe file in the replication archive for 
# further information.

rm(list = ls())

# Add location of replication archive below
rootArg = "~/Dropbox/Missionaries/Replication Archive"

dataDir = paste0(rootArg,"/data")
codeDir = paste0(rootArg,"/code")
outDir = paste0(rootArg,"/output")

library(ggplot2)
library(lfe)
library(stargazer)

################################################################################
########################## Load and prepare data ###############################
################################################################################

setwd(dataDir)
all.data <- readRDS("GriddataWithCovariates.rds")

all.data$border.side <- paste(all.data$country.x, all.data$own.D, all.data$nei.D, sep=".")

for(var in c("temperature", "rainfall", "elevation", "ruggedness", "malaria1", "malaria2", "tsetse_palp", "tsetse_mors", "tsetse_fusca", "Log_dist_explroutes", "Log_dist_rail", "Log_dist_waterways", "Log_dist_capital", "Log_dist_coast", "diamonds_within50", "oil_within50", "gas_within50", "agriculture", "cashcrop", "Log_dist_histcities", "Log_dist_natborder", "Log_dist_diamonds", "Log_dist_gas", "Log_dist_oil")){
  all.data[,paste("z_", var, sep="")]<- scale(all.data[,var])
}

################################################################################
## Figure 4: Effect of Proximity to Diocese Headquarters of Modern-Day Schools #
################################################################################

## Winsorize data at 95 percentile
quantile(all.data$schools, c(.75, .90, .95, .99))
all.data$schools_win <- all.data$schools
all.data$schools_win[which(all.data$schools_win>25)] <- 25

ggplot(all.data, aes(distance_hq_log, schools_win)) +
  geom_point(shape = 16, size = 3, show.legend = FALSE, alpha=0.1) +
  scale_alpha(range = c(.05, .25)) +
  stat_summary_bin(fun.y='mean', bins=100,
                   color='orange', size=2, geom='point') +
  geom_smooth() +
  geom_smooth(method = 'lm', color='red') +
  ylab("Schools per Grid") +
  xlab("Log Distance of Grid Centroid to Diocese HQ") +
  theme_minimal()
setwd(outDir)
ggsave("fig_4.png", width = 5, height = 5)

################################################################################
## Table A8: Effect of Proximity to Diocese Headquarters on Modern-Day Schools per Grid Controlling for Population Density 1880
################################################################################

all.data$border.side <- paste(all.data$country.x, all.data$own.D, all.data$nei.D, sep=".")

for(var in c("temperature", "rainfall", "elevation", "ruggedness", "malaria1", "malaria2", "tsetse_palp", "tsetse_mors", "tsetse_fusca", "Log_dist_explroutes", "Log_dist_rail", "Log_dist_waterways", "Log_dist_capital", "Log_dist_coast", "diamonds_within50", "oil_within50", "gas_within50", "agriculture", "cashcrop", "Log_dist_histcities", "Log_dist_natborder", "Log_dist_diamonds", "Log_dist_gas", "Log_dist_oil")){
  all.data[,paste("z_", var, sep="")]<- scale(all.data[,var])
}

# Drop observations within 5km of boundary because gridcells are not assigned to one diocese
all.data <- all.data[which(all.data$distance_boundary>5),]
# Drop diocese of Zanzibar
all.data <- all.data[which(all.data$Name!="Sansibar"),]

data.reg <- all.data[!is.na(all.data$treatment_log10),]
model10 <- felm(schools ~ proximity + popden.1880 + index_geo + index_location + index_NR | fixed_effects |0| border.side, data.reg)

data.reg <- all.data[!is.na(all.data$treatment_log15),]
model15 <- felm(schools ~ proximity + distance_boundary*treatment_log15 + popden.1880 + index_geo + index_location + index_NR | fixed_effects |0| border.side, data.reg)

data.reg <- all.data[!is.na(all.data$treatment_log20),]
model20 <- felm(schools ~ proximity + distance_boundary*treatment_log20 + popden.1880 + index_geo + index_location + index_NR | fixed_effects |0| border.side, data.reg)

data.reg <- all.data[!is.na(all.data$treatment_log25),]
model25 <- felm(schools ~ proximity + distance_boundary*treatment_log25 + popden.1880 + index_geo + index_location + index_NR | fixed_effects |0| border.side, data.reg)

data.reg <- all.data[!is.na(all.data$treatment_log50),]
model50 <- felm(schools ~ proximity + distance_boundary*treatment_log50 + popden.1880 + index_geo + index_location + index_NR | fixed_effects |0| border.side, data.reg)

setwd(outDir)
stargazer(model10, model15, model20, model25, model50, type="latex",
          title="Effect of Proximity to Diocese Head on Schools per Grid Today Controlling for PopDensity 1880",
          keep=c("proximity"),
          dep.var.labels = c("Schools per Grid"),
          column.labels=c("10km", "15km", "20km", "25km", "50km"),
          covariate.labels=c("Proximity to Diocese Headquarters"),
          keep.stat=c("n","adj.rsq"),
          add.lines = list(c("Fixed effects?", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Cluster", "BS", "BS", "BS", "BS", "BS")),
          font.size="small",
          notes.label = "Clustered Standard errors in parentheses",
          label="measure",
          out="table_a8.tex")
